#################################################################################################
# DOES THE CROWD PROMISE TO FUND MORE INNOVATION?
# REPLICATION FILES  
# FIGURES
#################################################################################################

#install.packages("texreg")
library(texreg)
library(plm)
library(stargazer)
library(ggplot2)
library(car)
library(hexbin)
library(foreign)

#specify log function (to avoid infinity and NA errors in R)
log <- function(x) ifelse(x <= 0, 0, base::log(x))

#################################################################################################
# FIGURE 2: RESIDUALS
#################################################################################################
fe <- read.csv("yearly_fe_final.csv", header=TRUE, sep=",")
fe <- pdata.frame(fe, index=c("fips","year"), row.names=TRUE, drop.index=TRUE)

fe$log_count_vc <- log(fe$vc_count+1)
fe$log_amount_vc <- log(fe$vc_amount+1)
fe$log_patents <- log(fe$num_patents+1)
fe$log_citations <- log(fe$num_citations+1)
fe$log_amount_ks <- log(fe$amount+1)
fe$log_campaigns_ks <- log(fe$num_campaigns+1)
fe$log_successful_ks <- log(fe$num_successful+1)
fe$log_instrument_ks <- log(fe$instrument_successful+1)
fe$log_successful_ks_100 <- log(fe$successful_100+1)

# COUNTS
m1 <- plm(log_count_vc~ log_patents + log_citations, data=fe, effect="twoways", model="within")
m2 <- plm(log_successful_ks ~ log_patents + log_citations, data=fe, effect="twoways", model="within")

res_vc = residuals(m1)
res_ks = residuals(m2)

df = as.data.frame(cbind(res_vc, res_ks))
cor(res_vc, res_ks)
sp = ggplot(df, aes(x= res_vc, y= res_ks))
sp + stat_binhex()+scale_fill_gradient(name="County-Years", low="lightblue", high="red")+xlab("VC Funded Companies (Ln)") +ylab("Successful KS Campaigns (Ln)") + theme_classic() + theme(text = element_text(size=20), legend.text=element_text(size=11), legend.position = c(0.1,1), legend.justification = c(0, 1)) + geom_rug(position="jitter", size=0.2) + annotate("text", label="r==0.17", parse=TRUE, x=0.8, y=3.5, size=10) + geom_smooth(method="lm")


#################################################################################################
# FIGURE S5: RESIDUALS 
#################################################################################################
m1 <- plm(log_amount_vc~ log_patents + log_citations, data=fe, effect="twoways", model="within")
m2 <- plm(log_amount_ks ~ log_patents + log_citations, data=fe, effect="twoways", model="within")

res_vc = residuals(m1)
res_ks = residuals(m2)

df = as.data.frame(cbind(res_vc, res_ks))
cor(res_vc, res_ks)
sp = ggplot(df, aes(x= res_vc, y= res_ks))
sp + stat_binhex()+scale_fill_gradient(name="County-Years", low="lightblue", high="red")+xlab("Amount VC Funding (Ln)") +ylab("Amount KS Funding (Ln)") + theme_classic() + theme(legend.background = element_rect(colour = 'transparent', fill = 'transparent'), text = element_text(size=20), legend.text=element_text(size=11), legend.position = c(0.08,1), legend.justification = c(0, 1)) + geom_rug(position="jitter", size=0.2) + annotate("text", label="r==0.02", parse=TRUE, x=8, y=8, size=10) + geom_smooth(method="lm")

#################################################################################################
# FIGURE 3: TIME SERIES
#################################################################################################
m1 <- plm(log_count_vc ~ lag(log_successful_ks, 1), data=fe, effect="twoways", model="within")
m2 <- plm(log_count_vc ~ lag(log_successful_ks, 2), data=fe, effect="twoways", model="within")
m3 <- plm(log_count_vc ~ lag(log_successful_ks, 3), data=fe, effect="twoways", model="within")

model4Frame <- data.frame(Variable = "KS (t-1)",
                          Coefficient = summary(m1)$coef[, 1],
                          SE = summary(m1)$coef[, 2],
                          Model = "1-Yr Lag")                   
model5Frame <- data.frame(Variable = "KS (t-2)",
                          Coefficient = summary(m2)$coef[, 1],
                          SE = summary(m2)$coef[, 2],
                          Model = "2-Yr Lag")                      
model6Frame <- data.frame(Variable = "KS (t-3)",
                          Coefficient = summary(m3)$coef[, 1],
                          SE = summary(m3)$coef[, 2],
                          Model = "3-Yr Lag")
allModelFrame <- data.frame(rbind(model4Frame, model5Frame, model6Frame))  # etc.

# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# Plot
zp1 <- ggplot(allModelFrame, aes(colour = Model))
zp1 <- zp1 + geom_hline(xintercept = 0, colour = gray(1/2), lty = 2)
zp1 <- zp1 + geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
                                ymax = Coefficient + SE*interval1),
                            lwd = 1, position = position_dodge(width = 1/2))
zp1 <- zp1 + geom_pointrange(aes(x = Variable, y = Coefficient, ymin = Coefficient - SE*interval2,
                                 ymax = Coefficient + SE*interval2),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             shape = 21, fill = "WHITE")
zp1 <- zp1 + theme_bw()
zp1 <- zp1 + scale_colour_brewer(palette="Set1") + ylab("Effect of 1% Increase on VC Activity (t)") + xlab("Successful Number KS Campaigns (Ln)") +ylim(0,0.115)+ theme(legend.position=c(0,0.4), text = element_text(size=15), legend.text=element_text(size=15), legend.justification = c(0, 1)) + theme_classic()
#+ ggtitle("Within-Region Effects of Past Successful KS on\n Future Number VC Backed Companies") 

print(zp1) # The trick to these is position_dodge().

###########################
# FIGURE 4: IV BY YEAR
###########################
data <- read.csv("coeffs.csv", header=TRUE, sep=",") #read the data
attach(data)
out <- data[,2:3]

model1Frame <- data.frame(Variable = c("KS 2009"),
                          Coefficient = out[1, 1],
                          SE = out[1, 2],
                          Year = "2009")
model2Frame <- data.frame(Variable = c("KS 2010"),
                          Coefficient = out[2,1],
                          SE = out[2,2],
                          Year = "2010")
model3Frame <- data.frame(Variable = c("KS 2011"),
                          Coefficient = out[3,1],
                          SE = out[3,2],
                          Year = "2011")
model4Frame <- data.frame(Variable = c("KS 2012"),
                          Coefficient = out[4,1],
                          SE = out[4,2],
                          Year = "2012") 
model5Frame <- data.frame(Variable = c("KS 2013"),
                          Coefficient = out[5,1],
                          SE = out[5,2],
                          Year = "2013")     
model6Frame <- data.frame(Variable = c("KS 2014"),
                          Coefficient = out[6,1],
                          SE = out[6,2],
                          Year = "2014")                                                                      
model7Frame <- data.frame(Variable = c("KS 2015"),
                          Coefficient = out[7,1],
                          SE = out[7,2],
                          Year = "2015")                    
# Combine these data.frames
allModelFrame <- data.frame(rbind(model1Frame, model2Frame, model3Frame, model4Frame, model5Frame, model6Frame, model7Frame))  # etc.

# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# Plot
require(ggplot2)
zp1 <- ggplot(allModelFrame, aes(colour = Year))
zp1 <- zp1 + geom_hline(xintercept = 0, colour = gray(1/2), lty = 2)
zp1 <- zp1 + geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
                                ymax = Coefficient + SE*interval1),
                            lwd = 1, position = position_dodge(width = 1/2))
zp1 <- zp1 + geom_pointrange(aes(x = Variable, y = Coefficient, ymin = Coefficient - SE*interval2,
                                 ymax = Coefficient + SE*interval2),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             shape = 21, fill = "WHITE")
zp1 <- zp1 + theme_bw()
zp1 <- zp1 + scale_colour_brewer(palette="Set2") + ylab("Effect of 1% Increase on VC Activity") + xlab("Successful Number KS Campaigns (Ln)") + theme(text = element_text(size=15), legend.text=element_text(size=15), legend.justification = c(0, 1)) + theme_classic()
 #+ ggtitle("Instrumental Variable Effects of\n Successful KS on Number VC Backed Companies")
 
print(zp1)

###########################
# FIGURE 4 new: IV BY YEAR
###########################
data <- read.csv("coeffs_all.csv", header=TRUE, sep=",") #read the data
attach(data)
out <- data[,3:4]


model1Frame <- data.frame(Variable = c("2009"),
                          Coefficient = out[8, 1],
                          SE = out[8, 2],
                          Model = "Technology KS Campaigns")
model2Frame <- data.frame(Variable = c("2010"),
                          Coefficient = out[9,1],
                          SE = out[9,2],
                          Model = "Technology KS Campaigns")
model3Frame <- data.frame(Variable = c("2011"),
                          Coefficient = out[10,1],
                          SE = out[10,2],
                          Model = "Technology KS Campaigns")
model4Frame <- data.frame(Variable = c("2012"),
                          Coefficient = out[11,1],
                          SE = out[11,2],
                          Model = "Technology KS Campaigns") 
model5Frame <- data.frame(Variable = c("2013"),
                          Coefficient = out[12,1],
                          SE = out[12,2],
                          Model = "Technology KS Campaigns")     
model6Frame <- data.frame(Variable = c("2014"),
                          Coefficient = out[13,1],
                          SE = out[13,2],
                          Model = "Technology KS Campaigns")                                                                      
model7Frame <- data.frame(Variable = c("2015"),
                          Coefficient = out[14,1],
                          SE = out[14,2],
                          Model = "Technology KS Campaigns")
model8Frame <- data.frame(Variable = c("2009"),
                          Coefficient = out[1, 1],
                          SE = out[1, 2],
                          Model = "Successful KS Campaigns")
model9Frame <- data.frame(Variable = c("2010"),
                          Coefficient = out[2,1],
                          SE = out[2,2],
                          Model = "Successful KS Campaigns")
model10Frame <- data.frame(Variable = c("2011"),
                          Coefficient = out[3,1],
                          SE = out[3,2],
                          Model = "Successful KS Campaigns")
model11Frame <- data.frame(Variable = c("2012"),
                          Coefficient = out[4,1],
                          SE = out[4,2],
                          Model = "Successful KS Campaigns") 
model12Frame <- data.frame(Variable = c("2013"),
                          Coefficient = out[5,1],
                          SE = out[5,2],
                          Model = "Successful KS Campaigns")     
model13Frame <- data.frame(Variable = c("2014"),
                          Coefficient = out[6,1],
                          SE = out[6,2],
                          Model = "Successful KS Campaigns")                                                                      
model14Frame <- data.frame(Variable = c("2015"),
                          Coefficient = out[7,1],
                          SE = out[7,2],
                          Model = "Successful KS Campaigns")                             
model15Frame <- data.frame(Variable = c("2009"),
                          Coefficient = out[15, 1],
                          SE = out[15, 2],
                          Model = "All KS Campaigns")
model16Frame <- data.frame(Variable = c("2010"),
                          Coefficient = out[16,1],
                          SE = out[16,2],
                          Model = "All KS Campaigns")
model17Frame <- data.frame(Variable = c("2011"),
                          Coefficient = out[17,1],
                          SE = out[17,2],
                          Model = "All KS Campaigns")
model18Frame <- data.frame(Variable = c("2012"),
                          Coefficient = out[18,1],
                          SE = out[18,2],
                          Model = "All KS Campaigns") 
model19Frame <- data.frame(Variable = c("2013"),
                          Coefficient = out[19,1],
                          SE = out[19,2],
                          Model = "All KS Campaigns")     
model20Frame <- data.frame(Variable = c("2014"),
                          Coefficient = out[20,1],
                          SE = out[20,2],
                          Model = "All KS Campaigns")                                                                      
model21Frame <- data.frame(Variable = c("2015"),
                          Coefficient = out[21,1],
                          SE = out[21,2],
                          Model = "All KS Campaigns")                                                                            
# Combine these data.frames
allModelFrame <- data.frame(rbind(model1Frame, model2Frame, model3Frame, model4Frame, model5Frame, model6Frame, model7Frame, model8Frame, model9Frame, model10Frame, model11Frame, model12Frame, model13Frame, model14Frame, model15Frame, model16Frame, model17Frame, model18Frame, model19Frame, model20Frame, model21Frame))  # etc.

# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# Plot
require(ggplot2)
zp1 <- ggplot(allModelFrame, aes(colour = Model))
zp1 <- zp1 + geom_hline(xintercept = 0, colour = gray(1/2), lty = 2)
zp1 <- zp1 + geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
                                ymax = Coefficient + SE*interval1),
                            lwd = 1, position = position_dodge(width = 1/2))
zp1 <- zp1 + geom_pointrange(aes(x = Variable, y = Coefficient, ymin = Coefficient - SE*interval2,
                                 ymax = Coefficient + SE*interval2),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             shape = 21, fill = "WHITE")
zp1 <- zp1 + theme_bw()
zp1 <- zp1 + scale_colour_brewer(palette="Set1") + ylab("Effect of 1% Increase on VC Activity") + xlab("Year") + theme(text = element_text(size=15), legend.text=element_text(size=15), legend.justification = c(0, 1)) + theme_classic()
 #+ ggtitle("Instrumental Variable Effects of\n Successful KS on Number VC Backed Companies")
 
print(zp1)

###############################################
# Appendix : IV BY YEAR - TECH ONLY 
###############################################
data <- read.csv("coeffs_tech.csv", header=TRUE, sep=",") #read the data
attach(data)
out <- data[,2:3]

model1Frame <- data.frame(Variable = c("KS 2009"),
                          Coefficient = out[1, 1],
                          SE = out[1, 2],
                          Year = "2009")
model2Frame <- data.frame(Variable = c("KS 2010"),
                          Coefficient = out[2,1],
                          SE = out[2,2],
                          Year = "2010")
model3Frame <- data.frame(Variable = c("KS 2011"),
                          Coefficient = out[3,1],
                          SE = out[3,2],
                          Year = "2011")
model4Frame <- data.frame(Variable = c("KS 2012"),
                          Coefficient = out[4,1],
                          SE = out[4,2],
                          Year = "2012") 
model5Frame <- data.frame(Variable = c("KS 2013"),
                          Coefficient = out[5,1],
                          SE = out[5,2],
                          Year = "2013")     
model6Frame <- data.frame(Variable = c("KS 2014"),
                          Coefficient = out[6,1],
                          SE = out[6,2],
                          Year = "2014")                                                                      
model7Frame <- data.frame(Variable = c("KS 2015"),
                          Coefficient = out[7,1],
                          SE = out[7,2],
                          Year = "2015")                    
# Combine these data.frames
allModelFrame <- data.frame(rbind(model1Frame, model2Frame, model3Frame, model4Frame, model5Frame, model6Frame, model7Frame))  # etc.

# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# Plot
require(ggplot2)
zp1 <- ggplot(allModelFrame, aes(colour = Year))
zp1 <- zp1 + geom_hline(xintercept = 0, colour = gray(1/2), lty = 2)
zp1 <- zp1 + geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
                                ymax = Coefficient + SE*interval1),
                            lwd = 1, position = position_dodge(width = 1/2))
zp1 <- zp1 + geom_pointrange(aes(x = Variable, y = Coefficient, ymin = Coefficient - SE*interval2,
                                 ymax = Coefficient + SE*interval2),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             shape = 21, fill = "WHITE")
zp1 <- zp1 + theme_bw()
zp1 <- zp1 + scale_colour_brewer(palette="Set2") + ylab("Effect of 1% Increase on VC Backed") + xlab("Successful Technology KS Campaigns (Ln)") + theme(text = element_text(size=15), legend.text=element_text(size=15), legend.justification = c(0, 1)) + theme_classic()
#+ ggtitle("Instrumental Variable Effects of\n Successful Technology KS on\nNumber VC Backed Companies") 

print(zp1)

#################################################################################
# ADDITIONAL ANALYSES - CONCENTRATION & INEQUALITY
#################################################################################

library(ineq)
fe <- read.csv("yearly_fe_final.csv", header=TRUE, sep=",")

fe$log_count_vc <- log(fe$vc_count+1)
fe$log_amount_vc <- log(fe$vc_amount+1)
fe$log_patents <- log(fe$num_patents+1)
fe$log_citations <- log(fe$num_citations+1)
fe$log_amount_ks <- log(fe$amount+1)
fe$log_campaigns_ks <- log(fe$num_campaigns+1)
fe$log_successful_ks <- log(fe$num_successful+1)
fe$log_instrument_ks <- log(fe$instrument_successful+1)
fe$log_successful_ks_100 <- log(fe$successful_100+1)

attach(fe)
gini_vc = ineq(fe$vc_count, type="Gini", na.rm=TRUE)
gini_ks = ineq(fe$num_campaigns, type="Gini", na.rm=TRUE)
gini_ks_succ = ineq(fe$num_successful, type="Gini", na.rm=TRUE)
gini_ks_succ_100 = ineq(fe$successful_100, type="Gini", na.rm=TRUE)

hhi_vc = conc(fe$vc_count, type="Herfindahl", na.rm=TRUE)
hhi_ks = conc(fe$num_campaigns, type="Herfindahl", na.rm=TRUE)
hhi_ks_succ = conc(fe$num_successful, type="Herfindahl", na.rm=TRUE)
hhi_ks_succ_100 = conc(fe$successful_100, type="Herfindahl", na.rm=TRUE)


# COMPUTE HIRSCH-HERFINDAHL INDEX OF VC INVESTMENT ACROSS COUNTIES FOR 2009-2014
# subset data by year
fe_09 = subset(fe, year==2009)
fe_10 = subset(fe, year==2010)
fe_11 = subset(fe, year==2011)
fe_12 = subset(fe, year==2012)
fe_13 = subset(fe, year==2013)
fe_14 = subset(fe, year==2014)
fe_15 = subset(fe, year==2015)

hhi_09 = conc(fe_09$log_amount_vc, type="Herfindahl", na.rm=TRUE)
hhi_10 = conc(fe_10$log_amount_vc, type="Herfindahl", na.rm=TRUE)
hhi_11 = conc(fe_11$log_amount_vc, type="Herfindahl", na.rm=TRUE)
hhi_12 = conc(fe_12$log_amount_vc, type="Herfindahl", na.rm=TRUE)
hhi_13 = conc(fe_13$log_amount_vc, type="Herfindahl", na.rm=TRUE)
hhi_14 = conc(fe_14$log_amount_vc, type="Herfindahl", na.rm=TRUE)
hhi_15 = conc(fe_15$log_amount_vc, type="Herfindahl", na.rm=TRUE)

ks_09 = mean(fe_09$log_amount_ks)
ks_10 = mean(fe_10$log_amount_ks)
ks_11 = mean(fe_11$log_amount_ks)
ks_12 = mean(fe_12$log_amount_ks)
ks_13 = mean(fe_13$log_amount_ks)
ks_14 = mean(fe_14$log_amount_ks)
ks_15 = mean(fe_15$log_amount_ks)

hhi = rbind(hhi_09, hhi_10, hhi_11, hhi_12, hhi_13, hhi_14, hhi_15)
ks = rbind(ks_09, ks_10, ks_11, ks_12, ks_13, ks_14, ks_15)
df = cbind(hhi, ks)
colnames(df)=c("hhi", "ks")
year = c(2009, 2010, 2011, 2012, 2013, 2014, 2015)
rownames(df) = c()
df = cbind(year, df)
df = data.frame(df)
library(ggplot2)

lm_eqn = function(df){
    m = lm(hhi~ks, df);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(coef(m)[1], digits = 2), 
              b = format(coef(m)[2], digits = 2), 
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));                 
}

ggplot(df, aes(x=df$ks, y=df$hhi))+
    geom_point()+
    geom_smooth(method="lm", se=TRUE) +
    xlab("Ln Amount Raised through Kickstarter")+ ylab("Hirsch-Herfindahl Concentration Index of VC Investment") +theme_classic() + theme(text = element_text(size=15)) + geom_text(data=df, size=7, aes(x =1.9, y = 0.0072, label=lm_eqn(df)), parse = TRUE, inherit.aes=FALSE)

modelhhi = lm(hhi~ks)
cor(ks, hhi)

# COMPUTE LOCATION GINI COEFFICIENTS OF VC INVESTMENT ACROSS COUNTIES FOR 2009-2014

gini_09 = ineq(fe_09$log_amount_vc, type="Gini", na.rm=TRUE)
gini_10 = ineq(fe_10$log_amount_vc, type="Gini", na.rm=TRUE)
gini_11 = ineq(fe_11$log_amount_vc, type="Gini", na.rm=TRUE)
gini_12 = ineq(fe_12$log_amount_vc, type="Gini", na.rm=TRUE)
gini_13 = ineq(fe_13$log_amount_vc, type="Gini", na.rm=TRUE)
gini_14 = ineq(fe_14$log_amount_vc, type="Gini", na.rm=TRUE)
gini_15 = ineq(fe_15$log_amount_vc, type="Gini", na.rm=TRUE)

gini = rbind(gini_09, gini_10, gini_11, gini_12, gini_13, gini_14, gini_15)
ks = rbind(ks_09, ks_10, ks_11, ks_12, ks_13, ks_14, ks_15)
df2 = cbind(gini, ks)
colnames(df2)=c("gini", "ks")
rownames(df2) = c()
year = c(2009, 2010, 2011, 2012, 2013, 2014, 2015)
df2 = cbind(year, df2)
df2 = data.frame(df2)
library(ggplot2)

lm_eqn = function(df){
    m = lm(gini~ks, df2);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(coef(m)[1], digits = 2), 
              b = format(coef(m)[2], digits = 2), 
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));                 
}

ggplot(df2, aes(df2$ks, df2$gini))+
    geom_point()+
    geom_smooth(method="lm", se=TRUE) +
    xlab("Ln Amount Raised through Kickstarter")+ ylab("Locational Gini Inequality Index of VC Investment") +theme_classic() + theme(text = element_text(size=15)) + geom_text(data=df2,size=7,aes(x = 2, y = .96, label=lm_eqn(df)), parse = TRUE, inherit.aes=FALSE)
    
modelgini = lm(gini~ks)
cor(gini,ks)

stargazer(modelhhi, modelgini)

# Combined figures
data= cbind(df, gini)
rownames(data)= c()

par(mar = c(5,5,2,5))
with(data, plot(year, gini, type="o", col="red3", pch=2,
             ylab="Locational Gini Inequality of Venture Capital Investment"))
par(new = T)
with(data, plot(year, ks, type="o", col="black", pch=16, axes=F, xlab=NA, ylab=NA, cex=1.2))
axis(side = 4)
mtext(side = 4, line = 3, 'Ln Amount Raised on Kickstarter')
legend("topleft",
       legend=c("Gini Inequality", "Ln Amount Raised in USD"),
       lty=c(1,1), pch=c(2,16), col=c("red3", "black"), bg="white") 
       

par(mar = c(5,5,2,5))
with(data, plot(year, hhi, type="o", col="blue", pch=5,
             ylab="Hirsch-Herfindahl Index of Venture Capital Investment"))
par(new = T)
with(data, plot(year, ks, type="o", col="black", pch=16, axes=F, xlab=NA, ylab=NA, cex=1.2))
axis(side = 4)
mtext(side = 4, line = 3, 'Ln Amount Raised on Kickstarter')
legend("topleft",
       legend=c("Hirsch-Herfindahl Index", "Ln Amount Raised in USD"),
       lty=c(1,1), pch=c(5, 16), col=c("blue", "black"), bg="white")

#------------------------------------------------------------------
#------------------------------------------------------------------
# Gini - Inequality dynamics
#------------------------------------------------------------------
#------------------------------------------------------------------
library(boot)
library(ineq)
fe <- read.csv("gini.csv", header=TRUE, sep=",")

fe_09 = subset(fe, year==2009)
fe_10 = subset(fe, year==2010)
fe_11 = subset(fe, year==2011)
fe_12 = subset(fe, year==2012)
fe_13 = subset(fe, year==2013)
fe_14 = subset(fe, year==2014)
fe_15 = subset(fe, year==2015)

fe_09$vc_pc = fe_09$vc_count/fe_09$estimates_base_2010
fe_10$vc_pc = fe_10$vc_count/fe_10$pop_estimate_2010
fe_11$vc_pc = fe_11$vc_count/fe_11$pop_estimate_2011
fe_12$vc_pc = fe_12$vc_count/fe_12$pop_estimate_2012
fe_13$vc_pc = fe_13$vc_count/fe_13$pop_estimate_2013
fe_14$vc_pc = fe_14$vc_count/fe_14$pop_estimate_2014
fe_15$vc_pc = fe_15$vc_count/fe_15$pop_estimate_2015

fe_09$ks_pc = fe_09$num_campaigns/fe_09$estimates_base_2010
fe_10$ks_pc = fe_10$num_campaigns/fe_10$pop_estimate_2010
fe_11$ks_pc = fe_11$num_campaigns/fe_11$pop_estimate_2011
fe_12$ks_pc = fe_12$num_campaigns/fe_12$pop_estimate_2012
fe_13$ks_pc = fe_13$num_campaigns/fe_13$pop_estimate_2013
fe_14$ks_pc = fe_14$num_campaigns/fe_14$pop_estimate_2014
fe_15$ks_pc = fe_15$num_campaigns/fe_15$pop_estimate_2015

fe_09$succ_pc = fe_09$num_successful/fe_09$estimates_base_2010
fe_10$succ_pc = fe_10$num_successful/fe_10$pop_estimate_2010
fe_11$succ_pc = fe_11$num_successful/fe_11$pop_estimate_2011
fe_12$succ_pc = fe_12$num_successful/fe_12$pop_estimate_2012
fe_13$succ_pc = fe_13$num_successful/fe_13$pop_estimate_2013
fe_14$succ_pc = fe_14$num_successful/fe_14$pop_estimate_2014
fe_15$succ_pc = fe_15$num_successful/fe_15$pop_estimate_2015

fe_09$s100_pc = fe_09$successful_100/fe_09$estimates_base_2010
fe_10$s100_pc = fe_10$successful_100/fe_10$pop_estimate_2010
fe_11$s100_pc = fe_11$successful_100/fe_11$pop_estimate_2011
fe_12$s100_pc = fe_12$successful_100/fe_12$pop_estimate_2012
fe_13$s100_pc = fe_13$successful_100/fe_13$pop_estimate_2013
fe_14$s100_pc = fe_14$successful_100/fe_14$pop_estimate_2014
fe_15$s100_pc = fe_15$successful_100/fe_15$pop_estimate_2015

gini1_09 = ineq(fe_09$vc_pc, type="Gini", na.rm=TRUE)
gini1_10 = ineq(fe_10$vc_pc, type="Gini", na.rm=TRUE)
gini1_11 = ineq(fe_11$vc_pc, type="Gini", na.rm=TRUE)
gini1_12 = ineq(fe_12$vc_pc, type="Gini", na.rm=TRUE)
gini1_13 = ineq(fe_13$vc_pc, type="Gini", na.rm=TRUE)
gini1_14 = ineq(fe_14$vc_pc, type="Gini", na.rm=TRUE)
gini1_15 = ineq(fe_15$vc_pc, type="Gini", na.rm=TRUE)

f <- function(d, i){
	d2 <- sample(d, size = length(data), replace = T)
	return(ineq(d2, type="Gini", na.rm=TRUE))
}

boot1_09 <- boot(data=fe_09$vc_pc, f, R=1000) #plot(boot1_09)
boot1_10 <- boot(data=fe_10$vc_pc, f, R=1000) 
boot1_11 <- boot(data=fe_11$vc_pc, f, R=1000) 
boot1_12 <- boot(data=fe_12$vc_pc, f, R=1000) 
boot1_13 <- boot(data=fe_13$vc_pc, f, R=1000) 
boot1_14 <- boot(data=fe_14$vc_pc, f, R=1000) 
boot1_15 <- boot(data=fe_15$vc_pc, f, R=1000) 

se1_09 <- sd(boot1_09$t) #ci1_09 <- boot.ci(boot.out = boot1_09, type = "norm")
se1_10 <- sd(boot1_10$t) 
se1_11 <- sd(boot1_11$t) 
se1_12 <- sd(boot1_12$t) 
se1_13 <- sd(boot1_13$t) 
se1_14 <- sd(boot1_14$t) 
se1_15 <- sd(boot1_15$t) 
  
gini2_09 = ineq(fe_09$ks_pc, type="Gini", na.rm=TRUE)
gini2_10 = ineq(fe_10$ks_pc, type="Gini", na.rm=TRUE)
gini2_11 = ineq(fe_11$ks_pc, type="Gini", na.rm=TRUE)
gini2_12 = ineq(fe_12$ks_pc, type="Gini", na.rm=TRUE)
gini2_13 = ineq(fe_13$ks_pc, type="Gini", na.rm=TRUE)
gini2_14 = ineq(fe_14$ks_pc, type="Gini", na.rm=TRUE)
gini2_15 = ineq(fe_15$ks_pc, type="Gini", na.rm=TRUE)

boot2_09 <- boot(data=fe_09$ks_pc, f, R=1000) #plot(boot1_09)
boot2_10 <- boot(data=fe_10$ks_pc, f, R=1000) 
boot2_11 <- boot(data=fe_11$ks_pc, f, R=1000) 
boot2_12 <- boot(data=fe_12$ks_pc, f, R=1000) 
boot2_13 <- boot(data=fe_13$ks_pc, f, R=1000) 
boot2_14 <- boot(data=fe_14$ks_pc, f, R=1000) 
boot2_15 <- boot(data=fe_15$ks_pc, f, R=1000) 

se2_09 <- sd(boot2_09$t) #ci1_09 <- boot.ci(boot.out = boot1_09, type = "norm")
se2_10 <- sd(boot2_10$t) 
se2_11 <- sd(boot2_11$t) 
se2_12 <- sd(boot2_12$t) 
se2_13 <- sd(boot2_13$t) 
se2_14 <- sd(boot2_14$t) 
se2_15 <- sd(boot2_15$t)

gini3_09 = ineq(fe_09$succ_pc, type="Gini", na.rm=TRUE)
gini3_10 = ineq(fe_10$succ_pc, type="Gini", na.rm=TRUE)
gini3_11 = ineq(fe_11$succ_pc, type="Gini", na.rm=TRUE)
gini3_12 = ineq(fe_12$succ_pc, type="Gini", na.rm=TRUE)
gini3_13 = ineq(fe_13$succ_pc, type="Gini", na.rm=TRUE)
gini3_14 = ineq(fe_14$succ_pc, type="Gini", na.rm=TRUE)
gini3_15 = ineq(fe_15$succ_pc, type="Gini", na.rm=TRUE)

boot3_09 <- boot(data=fe_09$succ_pc, f, R=1000) #plot(boot1_09)
boot3_10 <- boot(data=fe_10$succ_pc, f, R=1000) 
boot3_11 <- boot(data=fe_11$succ_pc, f, R=1000) 
boot3_12 <- boot(data=fe_12$succ_pc, f, R=1000) 
boot3_13 <- boot(data=fe_13$succ_pc, f, R=1000) 
boot3_14 <- boot(data=fe_14$succ_pc, f, R=1000) 
boot3_15 <- boot(data=fe_15$succ_pc, f, R=1000) 

se3_09 <- sd(boot3_09$t) #ci1_09 <- boot.ci(boot.out = boot1_09, type = "norm")
se3_10 <- sd(boot3_10$t) 
se3_11 <- sd(boot3_11$t) 
se3_12 <- sd(boot3_12$t) 
se3_13 <- sd(boot3_13$t) 
se3_14 <- sd(boot3_14$t) 
se3_15 <- sd(boot3_15$t)

gini4_09 = ineq(fe_09$s100_pc, type="Gini", na.rm=TRUE)
gini4_10 = ineq(fe_10$s100_pc, type="Gini", na.rm=TRUE)
gini4_11 = ineq(fe_11$s100_pc, type="Gini", na.rm=TRUE)
gini4_12 = ineq(fe_12$s100_pc, type="Gini", na.rm=TRUE)
gini4_13 = ineq(fe_13$s100_pc, type="Gini", na.rm=TRUE)
gini4_14 = ineq(fe_14$s100_pc, type="Gini", na.rm=TRUE)
gini4_15 = ineq(fe_15$s100_pc, type="Gini", na.rm=TRUE)

boot4_09 <- boot(data=fe_09$s100_pc, f, R=1000) #plot(boot1_09)
boot4_10 <- boot(data=fe_10$s100_pc, f, R=1000) 
boot4_11 <- boot(data=fe_11$s100_pc, f, R=1000) 
boot4_12 <- boot(data=fe_12$s100_pc, f, R=1000) 
boot4_13 <- boot(data=fe_13$s100_pc, f, R=1000) 
boot4_14 <- boot(data=fe_14$s100_pc, f, R=1000) 
boot4_15 <- boot(data=fe_15$s100_pc, f, R=1000) 

se4_09 <- sd(boot4_09$t) #ci1_09 <- boot.ci(boot.out = boot1_09, type = "norm")
se4_10 <- sd(boot4_10$t) 
se4_11 <- sd(boot4_11$t) 
se4_12 <- sd(boot4_12$t) 
se4_13 <- sd(boot4_13$t) 
se4_14 <- sd(boot4_14$t) 
se4_15 <- sd(boot4_15$t)

gini_vc <- c(gini1_09, gini1_10, gini1_11, gini1_12, gini1_13, gini1_14, gini1_15)
se_vc <- c(se1_09, se1_10, se1_11, se1_12, se1_13, se1_14, se1_15)
gini_ks <- c(gini2_09, gini2_10, gini2_11, gini2_12, gini2_13, gini2_14, gini2_15)
se_ks <- c(se2_09, se2_10, se2_11, se2_12, se2_13, se2_14, se2_15)
gini_ks_succ <-c(gini3_09, gini3_10, gini3_11, gini3_12, gini3_13, gini3_14, gini3_15)
se_ks_succ <- c(se3_09, se3_10, se3_11, se3_12, se3_13, se3_14, se3_15)
gini_ks_succ_100 <- c(gini4_09, gini4_10, gini4_11, gini4_12, gini4_13, gini4_14, gini4_15)       
se_ks_succ_100 <- c(se4_09, se4_10, se4_11, se4_12, se4_13, se4_14, se4_15)

year= c(2009,2010,2011,2012,2013,2014,2015)
coeffs_gini = cbind(year,gini_vc, se_vc, gini_ks, se_ks, gini_ks_succ, se_ks_succ, gini_ks_succ_100, se_ks_succ_100)
write.csv(coeffs_gini, file = "coeffs_gini.csv",row.names=FALSE, na="")

data <- read.csv("coeffs_gini.csv", header=TRUE, sep=",", na="") #read the data
attach(data)
x = c(1:7)
plot(x, gini_vc, type = "o",col = "red", xlab = "Year", ylab = "Gini Coefficient", 
   main = "Inequality Dynamics, 2009-2015", ylim=c(0.75,1), axes=F, ann=F)
arrows(x, gini_vc-1.96*se_vc, x, gini_vc +1.96*se_vc, length=0.05, angle=90, code=3, col="red")
lines(gini_ks, type = "o", col = "blue", pch=22, lty=2)
arrows(x, gini_ks-1.96*se_ks, x, gini_ks +1.96*se_ks, length=0.05, angle=90, code=3, col="blue")
lines(gini_ks_succ, type = "o", col = "darkgreen", pch=23, lty=3)
arrows(x, gini_ks_succ-1.96*se_ks_succ, x, gini_ks_succ +1.96*se_ks_succ, length=0.05, angle=90, code=3, col="darkgreen")
lines(gini_ks_succ_100, type = "o", col = "purple", pch=24, lty=4)
arrows(x, gini_ks_succ_100-1.96*se_ks_succ_100, x, gini_ks_succ_100 +1.96*se_ks_succ_100, length=0.05, angle=90, code=3, col="purple")
axis(1, at=1:7, lab=c("2009","2010","2011","2012","2013","2014", "2015"))
axis(2, las=1, at=c(0.75,0.80,0.85,0.9,0.95, 1))
box()
title(xlab="Year")
title(ylab="Gini Coefficient")
title(main="Inequality Dynamics, 2009-2015")
legend(1,0.81, c("Venture capital rounds","Kickstarter campaigns", "Successful Kickstarter campaigns", "Successful Kickstarter campaigns > $100K"), cex=0.8, 
   col=c("red","blue","darkgreen","purple"), pch=21:24, lty=1:4, bg="white");

# Only Successful KS and VC
x = c(1:7)
plot(x, data$gini_vc, type = "o",col = "red", xlab = "Year", ylab = "Gini Coefficient", 
   main = "Inequality Dynamics, 2009-2015", ylim=c(0.9,1), axes=F, ann=F)
arrows(x, data$gini_vc-1.96*data$se_vc, x, data$gini_vc +1.96* data$se_vc, length=0.05, angle=90, code=3, col="red")
lines(data$gini_ks_succ, type = "o", col = "blue", pch=23, lty=3)
arrows(x, data$gini_ks_succ-1.96* data$se_ks_succ, x, data$gini_ks_succ +1.96* data$se_ks_succ, length=0.05, angle=90, code=3, col="blue")
axis(1, at=1:7, lab=c("2009","2010","2011","2012","2013","2014", "2015"))
axis(2, las=1, at=c(0.9,0.92,0.94,0.96, 0.98, 1))
box()
title(xlab="Year")
title(ylab="Gini Coefficient")
title(main="")
legend(1,0.92, c("Venture capital rounds","Successful Kickstarter campaigns"), cex=0.8, 
   col=c("red","blue"), pch=21:22, lty=1:2, bg="white");


#------------------------------------------------------------------
# HHI - comparison between Kickstarter and venture capital
#------------------------------------------------------------------

hhi1_09 = conc(fe_09$vc_count, type="Herfindahl", na.rm=TRUE)
hhi1_10 = conc(fe_10$vc_count, type="Herfindahl", na.rm=TRUE)
hhi1_11 = conc(fe_11$vc_count, type="Herfindahl", na.rm=TRUE)
hhi1_12 = conc(fe_12$vc_count, type="Herfindahl", na.rm=TRUE)
hhi1_13 = conc(fe_13$vc_count, type="Herfindahl", na.rm=TRUE)
hhi1_14 = conc(fe_14$vc_count, type="Herfindahl", na.rm=TRUE)
hhi1_15 = conc(fe_15$vc_count, type="Herfindahl", na.rm=TRUE)

hhi2_09 = conc(fe_09$num_campaigns, type="Herfindahl", na.rm=TRUE)
hhi2_10 = conc(fe_10$num_campaigns, type="Herfindahl", na.rm=TRUE)
hhi2_11 = conc(fe_11$num_campaigns, type="Herfindahl", na.rm=TRUE)
hhi2_12 = conc(fe_12$num_campaigns, type="Herfindahl", na.rm=TRUE)
hhi2_13 = conc(fe_13$num_campaigns, type="Herfindahl", na.rm=TRUE)
hhi2_14 = conc(fe_14$num_campaigns, type="Herfindahl", na.rm=TRUE)
hhi2_15 = conc(fe_15$num_campaigns, type="Herfindahl", na.rm=TRUE)

hhi3_09 = conc(fe_09$num_successful, type="Herfindahl", na.rm=TRUE)
hhi3_10 = conc(fe_10$num_successful, type="Herfindahl", na.rm=TRUE)
hhi3_11 = conc(fe_11$num_successful, type="Herfindahl", na.rm=TRUE)
hhi3_12 = conc(fe_12$num_successful, type="Herfindahl", na.rm=TRUE)
hhi3_13 = conc(fe_13$num_successful, type="Herfindahl", na.rm=TRUE)
hhi3_14 = conc(fe_14$num_successful, type="Herfindahl", na.rm=TRUE)
hhi3_15 = conc(fe_15$num_successful, type="Herfindahl", na.rm=TRUE)

hhi4_09 = conc(fe_09$successful_100, type="Herfindahl", na.rm=TRUE)
hhi4_10 = conc(fe_10$successful_100, type="Herfindahl", na.rm=TRUE)
hhi4_11 = conc(fe_11$successful_100, type="Herfindahl", na.rm=TRUE)
hhi4_12 = conc(fe_12$successful_100, type="Herfindahl", na.rm=TRUE)
hhi4_13 = conc(fe_13$successful_100, type="Herfindahl", na.rm=TRUE)
hhi4_14 = conc(fe_14$successful_100, type="Herfindahl", na.rm=TRUE)
hhi4_15 = conc(fe_15$successful_100, type="Herfindahl", na.rm=TRUE)

hhi_vc <- c(hhi1_09, hhi1_10, hhi1_11, hhi1_12, hhi1_13, hhi1_14, hhi1_15)
hhi_ks <- c(hhi2_09, hhi2_10, hhi2_11, hhi2_12, hhi2_13, hhi2_14, hhi2_15)
hhi_ks_succ <-c(hhi3_09, hhi3_10, hhi3_11, hhi3_12, hhi3_13, hhi3_14, hhi3_15)
hhi_ks_succ_100 <- c(hhi4_09, hhi4_10, hhi4_11, hhi4_12, hhi4_13, hhi4_14, hhi4_15)       

plot(hhi_vc, type = "o",col = "red", xlab = "Year", ylab = "Gini Coefficient", 
   main = "Concentration Dynamics, 2009-2015", ylim=c(0,0.27), axes=F, ann=F)
lines(hhi_ks, type = "o", col = "blue", pch=22, lty=2)
lines(hhi_ks_succ, type = "o", col = "darkgreen", pch=23, lty=3)
lines(hhi_ks_succ_100, type = "o", col = "purple", pch=24, lty=4)
axis(1, at=1:7, lab=c("2009","2010","2011","2012","2013","2014", "2015"))
axis(2, las=1, at=seq(0,0.3,0.05))
box()
title(xlab="Year")
title(ylab="Hirsch-Herfindahl Index (HHI)")
title(main="Concentration Dynamics, 2009-2015")
legend(3.3,0.278, c("Venture capital rounds","Kickstarter campaigns", "Successful Kickstarter campaigns", "Successful Kickstarter campaigns > $100K"), cex=0.8, 
   col=c("red","blue","darkgreen","purple"), pch=21:24, lty=1:4, bg="white");
   